* Housekeeping
set more off
timer clear

* Working paths
global base "/directory"
global DoPth "$base/dofiles/"
global PthIn "$base/data/" 
global PthOut "$base/output/"

* Globals and locals
global starty 1976
global startyplus 1977
global endy 2011

*******************************
*** RESULTS FROM APPENDIX A ***
*******************************

****************************************************************
*** TABLE A1 - ALTERNATIVE MEASURES OF VOLATILITY AND GROWTH ***
****************************************************************

* Note: - Disadvantage of "blocking" method by Koren and Tenreyro (2013) is that potentially useful variation in size (and oher covariates)
*	  cannot be fully exploited
*		- Some with-firm level variation is disposed of by constructing time blocks
*	- To deal with this, we adopt the "rolling volatility" method of Davis et al. (2006) and, Thesmar and Thoenig (2011)
*	- Under this methodology, volatility can be defined at the annual level
*		- Volatility at year t is the standard deviation of growth rates within a rolling window of t-4 and t+5
*		  However, we do correct for the degree of freedom within each rolling window
*		- If the amount of observations is less than 6 (i.e., 5 or less), it is simply the standard measure for volatility and
*		  there will be no within-firm level variation in volatility

*	- We implement this methodology by using the rolling volatility definition from Thesmar and Thoenig (2011)
*	- The methodology by Davis et al. (2006) makes two adjustments:
*		- "weighted" degree of freedom correction
*		- Deviations in standard deviation are from a weighted mean
*	- This implies that volatilities are "too smooth" and this biases the coefficient for size downward

*** Table A1 - First and second columns
use "$PthOut/firm_LBD_master2.dta", clear

gen block2 = 1
replace block2 = 2 if year>1981 & year<1987
replace block2 = 3 if year>1986 & year<1992
replace block2 = 4 if year>1991 & year<1997
replace block2 = 5 if year>1996 & year<2002
replace block2 = 6 if year>2001 & year<2007
replace block2 = 7 if year>2006 & year<2012
replace block2 = . if year==1976 | year>=2012

bys firmid block2: egen stdev_gr = sd(emp_gr_ln2)
bys firmid block2: egen avg_size = mean(emp)

bys firmid block2: egen stdev_gr2 = sd(emp_gr_DHS2)
bys firmid block2: egen avg_size2 = mean(emp_DHS2)

gen lstdev_gr = ln(stdev_gr)
gen lavg_size = ln(avg_size)
gen lstdev_gr2 = ln(stdev_gr2)
gen lavg_size2 = ln(avg_size2)

bys firmid block2: gen f_obs2 = _n

* Collapse to "firmid-time block" (rather than "firmid-year") level
keep if f_obs2==1 & block2!=.

reghdfe lstdev_gr lavg_size, absorb(block2 firmid) vce(cluster fk_naics12_3)
reghdfe lstdev_gr2 lavg_size, absorb(block2 firmid) vce(cluster fk_naics12_3)

*** Table A1 - Third and fourth columns
use "$PthOut/firm_LBD_master2_rollvol.dta", clear

gen lstdev_gr = ln(stdev_gr)
gen lsize = ln(emp)

reghdfe lstdev_gr lsize, absorb(year firmid) vce(cluster fk_naics12_3)

use "$PthOut/firm_LBD_master2_rollvol_DHS.dta", clear

gen lstdev_gr = ln(stdev_gr)
gen lsize = ln(emp)

reghdfe lstdev_gr lsize, absorb(year firmid) vce(cluster fk_naics12_3)

***********************************************
*** TABLE A2 - HIGHER-ORDER LOG POLYNOMIALS ***
***********************************************
use "$PthOut/firm_LBD_master2.dta", clear

gen block2 = 1
replace block2 = 2 if year>1981 & year<1987
replace block2 = 3 if year>1986 & year<1992
replace block2 = 4 if year>1991 & year<1997
replace block2 = 5 if year>1996 & year<2002
replace block2 = 6 if year>2001 & year<2007
replace block2 = 7 if year>2006 & year<2012
replace block2 = . if year==1976 | year>=2012

bys firmid block2: egen stdev_gr = sd(emp_gr_ln2)
bys firmid block2: egen avg_size = mean(emp)

gen lstdev_gr = ln(stdev_gr)
gen lavg_size = ln(avg_size)
gen lavg_size2 = lavg_size^2
gen lavg_size3 = lavg_size^3

bys firmid block2: gen f_obs2 = _n

* Collapse to "firmid-time" block (rather than "firmid-year") level
keep if f_obs2==1 & block2!=.

reghdfe lstdev_gr lavg_size, absorb(block2 firmid) vce(cluster fk_naics12_3)
reghdfe lstdev_gr lavg_size lavg_size2, absorb(block2 firmid) vce(cluster fk_naics12_3)
reghdfe lstdev_gr lavg_size lavg_size2 lavg_size3, absorb(block2 firmid) vce(cluster fk_naics12_3)

************************************************
*** TABLE A3 - SUBSAMPLE OF ONLY LARGE FIRMS ***
************************************************
use "$PthOut/firm_LBD_master2.dta", clear

gen block2 = 1
replace block2 = 2 if year>1981 & year<1987
replace block2 = 3 if year>1986 & year<1992
replace block2 = 4 if year>1991 & year<1997
replace block2 = 5 if year>1996 & year<2002
replace block2 = 6 if year>2001 & year<2007
replace block2 = 7 if year>2006 & year<2012
replace block2 = . if year==1976 | year>=2012

bys firmid block2: egen stdev_gr = sd(emp_gr_ln2)
bys firmid block2: egen avg_size = mean(emp)

gen lstdev_gr = ln(stdev_gr)
gen lavg_size = ln(avg_size)

* Collapse to "firmid-time" block (rather than "firmid-year") level
keep if f_obs2==1 & block2!=.

preserve

	keep if avg_size>=5000
	reghdfe lstdev_gr lavg_size, absorb(block2 firmid) vce(cluster fk_naics12_3)
	
restore

keep if avg_size>=10000
reghdfe lstdev_gr lavg_size, absorb(block2 firmid) vce(cluster fk_naics12_3)

